Undisciplined Art - Chance
Sound of Sight
Lauren Hu
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import librosa as lb
from scipy.signal import stft, istft
import os.path
import IPython.display as ipd
def plti(im, h=5, **kwargs):
"""plot an image. http://www.degeneratestate.org/posts/2016/Oct/23/image-processing-with-numpy/ """
y = im.shape[0]
x = im.shape[1]
w = (y/x) * h
plt.figure(figsize=(w,h))
plt.imshow(im, interpolation="none", **kwargs)
plt.axis('off')
def to_grayscale(im, weights = np.c_[0.2989, 0.5870, 0.1140]):
"""Transforms a colour image to a greyscale image by taking the mean of the RGB values, weighted by the matrix weights
http://www.degeneratestate.org/posts/2016/Oct/23/image-processing-with-numpy/ """
tile = np.tile(weights, reps=(im.shape[0],im.shape[1],1))
return np.sum(tile * im, axis=2)
def imageToAudio(image):
'''image [input] image file as string
x [output] audio array from image matrix '''
im = plt.imread(image)
plti(im)
img = to_grayscale(im)
img = np.flip(img, axis=0)
t, x = istft(img)
return ipd.Audio(x, rate = 2*22050)
shire, sr_shire = lb.load('TheShire.mp3')
sr = 22050; ytime = len(shire)//10
N = 2048; hop = 1024; f, t, S = stft(shire, sr, nperseg = N, noverlap = 7*N/8)
freqstep = 22050/(N); maxN = 200; freqmax = maxN*freqstep; a = len(t)/5
ipd.Audio(shire[0:ytime], rate = sr)
a = len(t)/5
plt.figure(figsize=(18,10)); plt.imshow(np.abs(S[0:maxN,:]), cmap='inferno',vmin=0, vmax=0.01, origin='lower', extent=(0,S.shape[1],0,freqmax))
plt.xticks((0,a,2*a,3*a,4*a,5*a), ('0:00','0:30','1:00','1:30','2:00','2:30')); plt.xlabel('Time [minutes]'); plt.ylabel('Frequency [Hz]'); plt.title('Spectrogram of Shire'); plt.show()
imageToAudio('diagonal2.png')
imageToAudio('derpcorn.png')
imageToAudio('banksky.jpg')
# https://www.sothebys.com/en/articles/sothebys-gets-banksyed-at-contemporary-art-auction-in-london
imageToAudio('trump.jpg')
imageToAudio('monalisa.jpg')